# coding:utf8

import os
from pyspark import *

# os.environ['PYSPARK_PYTHON']='D:/ProgramFiles/anaconda3/envs/pyspark/python.exe'
# os.environ["pyspark_python"]="D:\ProgramFiles\python\python3.10.4\python.exe"

if __name__ == '__main__':
    conf=SparkConf().setAppName("HelloWorld")\
    # .setMaster("local[*]")
    sc=SparkContext(conf=conf)

    print(os.getcwd())
    print(os.path.dirname(os.getcwd()))
    localhost_path='file://'+os.path.dirname(os.getcwd())+'/data/00_example_HelloWorld.txt'
    localhost_path="hdfs://hadoop3cluster/updown/WordCountInput.txt"
    # result=sc.parallelize(["hadoop java java python"])\
    # result=sc.textFile("hdfs://hadoop3cluster/updown/WordCountInput.txt")\
    result =sc.textFile(localhost_path)\
        .flatMap(lambda t:t.split(" ")).map(lambda t:(t,1))\
        .reduceByKey(lambda a,b:a+b)
    print(result.collect())

#